#!/usr/bin/python
# -*- coding: utf-8 -*-

#$ -l h_rt=12:00:00
#$ -l virtual_free=500M
#$ -j y
#$ -o $HOME/wlm.out
#$ -N wlm

import sys
#sys.path.append('/home/artem/pywikipedia')
#sys.path.append('/home/artem/Dropbox/wiki/bot/pywikipedia')
sys.path.append('pywikipedia')

import wikipedia
import urllib2
import re

from BeautifulSoup import BeautifulSoup

items = {}
for i in range (0,2):
	if i != 0:
		url = 'http://kulturnoe-nasledie.ru/search.php?query=%CD%C8%C6%C5%C3%CE%D0%CE%C4%D1%CA%C0%DF%20%CE%C1%CB%C0%D1%D2%DC&page=' + str(i)
	else:
		url = 'http://kulturnoe-nasledie.ru/search.php?query=%CD%C8%C6%C5%C3%CE%D0%CE%C4%D1%CA%C0%DF%20%CE%C1%CB%C0%D1%D2%DC' 
	print u"Retrieving " + url
	webpage = urllib2.urlopen(url)
	print u"\nDone!"
	
	soup = BeautifulSoup(webpage)
	
	for table in soup.findAll('table', attrs={'class': 'list'}):
		okn = table.findAll('tr')[0].findAll('td')[0].b.contents[0]
		href = 'http://kulturnoe-nasledie.ru/monuments.php?id=' + okn
		descr = table.findAll('tr')[0].findAll('td')[0].a.span.contents[0]
		
		print u"Retrieving " + href
		webpage2 = urllib2.urlopen(href)
		print u"\nDone!"
		soup2 = BeautifulSoup(webpage)
		
		row = {}
		for tr in soup2.findAll('table', atrrs={'cellspacing':"0", 'cellpadding':"6", 'border':"0", 'style':"width: 800px;", 'align':"center"}).findAll('tr'):
			row[tr.findAll('td')[0].contents[0].strip(':')] = tr.findAll('td')[1].contents[0]
		
		print row
		
		items[okn] = [okn, href, descr]

for item in items:
	print items[item][1]
